In this notebook, we read the 'lif' files that resulted from manual annotation in labelIMG. They provide a bounding box that marks the 'OS' in the image. We want to use these as input for a classifier that segments the OS. We could generate a train data set by making patches that are centered around the center of the bbox, and patches that are completely outside of the bbox. The resulting classifier can classify a pixel by looking at the patch around it. This gives 'heatmap' of os-iness of the pixels. Hopefully we can take the maximum of the heatmap as the center of OS.
In [20]:
import matplotlib.pyplot as plt
%matplotlib inline
from bs4 import BeautifulSoup
import os
import cv2
In [19]:
lif_path = '/media/sf_VBox_Shared/kaggle/cervical-cancer/labels/'
image_path = '/media/sf_VBox_Shared/kaggle/cervical-cancer/processed/'
types = ['Type_1', 'Type_2', 'Type_3']
In [ ]:
def get_bbox(path):
with open(path, 'r') as f:
soup = BeautifulSoup(f, 'lxml')
box = soup.find('bndbox')
keys = ['xmin', 'xmax', 'ymin', 'ymax']
return {key:int(box.find(key).contents[0]) for key in keys}
In [60]:
bboxes = []
for typ in types:
for fn in os.listdir(os.path.join(lif_path, typ)):
bbox = get_bbox(os.path.join(lif_path, typ, fn))
bbox['width'] = bbox['xmax'] - bbox['xmin']
bbox['height'] = bbox['ymax'] - bbox['ymin']
bbox['area'] = bbox['width'] * bbox['height']
fn_image = fn.replace('.lif', '.jpg')
img = cv2.imread(os.path.join(image_path, typ, fn_image))
w, h, c = img.shape
bbox['img_dim'] = (w, h, c)
bbox['rel_xmin'] = bbox['xmin'] / float(w)
bbox['rel_xmax'] = bbox['xmax'] / float(w)
bbox['rel_ymin'] = bbox['ymin'] / float(h)
bbox['rel_ymax'] = bbox['ymax'] / float(h)
bbox['rel_width'] = bbox['rel_xmax'] - bbox['rel_xmin']
bbox['rel_height'] = bbox['rel_ymax'] - bbox['rel_ymin']
bboxes.append(bbox)
In [61]:
import pandas as pd
bboxes_df = pd.DataFrame(bboxes)
In [62]:
bboxes_df.head()
Out[62]:
In [63]:
print('min:', bboxes_df[['width', 'height']].min())
print('max:', bboxes_df[['width', 'height']].max())
bboxes_df[['width', 'height']].boxplot();
plt.show()
bboxes_df[['area']].boxplot();
plt.show()
In [64]:
print('min:', bboxes_df[['rel_width', 'rel_height']].min())
print('max:', bboxes_df[['rel_width', 'rel_height']].max())
bboxes_df[['rel_width', 'rel_height']].boxplot();
plt.show()
In [65]:
plt.scatter(bboxes_df['width'], bboxes_df['height'])
Out[65]:
In [66]:
plt.scatter(bboxes_df['rel_width'], bboxes_df['rel_height'])
Out[66]:
In [69]:
plt.scatter(bboxes_df['rel_width'], [d[0] for d in bboxes_df['img_dim']])
Out[69]:
In [10]:
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
In [46]:
img.shape
Out[46]:
In [11]:
plt.imshow(img)
Out[11]:
In [18]:
rect_img = cv2.rectangle(img, (bbox['xmin'], bbox['ymin']), (bbox['xmax'], bbox['ymax']), (0, 255, 0))
plt.imshow(rect_img)
Out[18]:
In [ ]: